{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cd3c9269-a19e-45ce-ab89-07038aa0c230",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ff4ac05d-eb4a-46ca-8aea-1ce3a8615ee3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      Unnamed: 0  index                                          Job Title  \\\n",
      "0              0      0                              Senior Data Scientist   \n",
      "1              1      1                  Data Scientist, Product Analytics   \n",
      "2              2      2                               Data Science Manager   \n",
      "3              3      3                                       Data Analyst   \n",
      "4              4      4                             Director, Data Science   \n",
      "...          ...    ...                                                ...   \n",
      "3904        3904   4375                                  AWS Data Engineer   \n",
      "3905        3905   4376                              Data Analyst â Junior   \n",
      "3906        3906   4377                   Security Analytics Data Engineer   \n",
      "3907        3907   4378                   Security Analytics Data Engineer   \n",
      "3908        3908   4379  Patient Safety Physician or Safety Scientist -...   \n",
      "\n",
      "                   Salary Estimate  \\\n",
      "0     $111K-$181K (Glassdoor est.)   \n",
      "1     $111K-$181K (Glassdoor est.)   \n",
      "2     $111K-$181K (Glassdoor est.)   \n",
      "3     $111K-$181K (Glassdoor est.)   \n",
      "4     $111K-$181K (Glassdoor est.)   \n",
      "...                            ...   \n",
      "3904   $55K-$112K (Glassdoor est.)   \n",
      "3905   $55K-$112K (Glassdoor est.)   \n",
      "3906   $55K-$112K (Glassdoor est.)   \n",
      "3907   $55K-$112K (Glassdoor est.)   \n",
      "3908   $55K-$112K (Glassdoor est.)   \n",
      "\n",
      "                                        Job Description  Rating  \\\n",
      "0     ABOUT HOPPER\\r\\n\\r\\nAt Hopper, we’re on a miss...     3.5   \n",
      "1     At Noom, we use scientifically proven methods ...     4.5   \n",
      "2     Decode_M\\r\\n\\r\\nhttps://www.decode-m.com/\\r\\n\\...    -1.0   \n",
      "3     Sapphire Digital seeks a dynamic and driven mi...     3.4   \n",
      "4     Director, Data Science - (200537)\\r\\nDescripti...     3.4   \n",
      "...                                                 ...     ...   \n",
      "3904  About Us\\r\\n\\r\\nTachyon Technologies is a Digi...     4.4   \n",
      "3905  Job description\\r\\nInterpret data, analyze res...     5.0   \n",
      "3906  Job DescriptionThe Security Analytics Data Eng...     3.8   \n",
      "3907  The Security Analytics Data Engineer will inte...     4.0   \n",
      "3908  Help us transform patients' lives.\\r\\nAt UCB, ...     3.7   \n",
      "\n",
      "                                 Company Name       Location  \\\n",
      "0                               Hopper\\r\\n3.5   New York, NY   \n",
      "1                              Noom US\\r\\n4.5   New York, NY   \n",
      "2                                    Decode_M   New York, NY   \n",
      "3                     Sapphire Digital\\r\\n3.4  Lyndhurst, NJ   \n",
      "4           United Entertainment Group\\r\\n3.4   New York, NY   \n",
      "...                                       ...            ...   \n",
      "3904              Tachyon Technologies\\r\\n4.4     Dublin, OH   \n",
      "3905  Staffigo Technical Services, LLC\\r\\n5.0   Columbus, OH   \n",
      "3906                    PDS Tech, Inc.\\r\\n3.8     Dublin, OH   \n",
      "3907        Data Resource Technologies\\r\\n4.0     Dublin, OH   \n",
      "3908                               UCB\\r\\n3.7     Slough, OH   \n",
      "\n",
      "          Headquarters                     Size  Founded  Type of ownership  \\\n",
      "0     Montreal, Canada    501 to 1000 employees     2007  Company - Private   \n",
      "1         New York, NY   1001 to 5000 employees     2008  Company - Private   \n",
      "2         New York, NY        1 to 50 employees       -1            Unknown   \n",
      "3        Lyndhurst, NJ     201 to 500 employees     2019  Company - Private   \n",
      "4         New York, NY      51 to 200 employees     2007  Company - Private   \n",
      "...                ...                      ...      ...                ...   \n",
      "3904        Irving, TX     201 to 500 employees     2011  Company - Private   \n",
      "3905     Woodridge, IL      51 to 200 employees     2008  Company - Private   \n",
      "3906        Irving, TX  5001 to 10000 employees     1977  Company - Private   \n",
      "3907         Omaha, NE        1 to 50 employees       -1  Company - Private   \n",
      "3908  Brussel, Belgium  5001 to 10000 employees       -1   Company - Public   \n",
      "\n",
      "                       Industry                     Sector  \\\n",
      "0               Travel Agencies           Travel & Tourism   \n",
      "1     Health, Beauty, & Fitness          Consumer Services   \n",
      "2                            -1                         -1   \n",
      "3                      Internet     Information Technology   \n",
      "4       Advertising & Marketing          Business Services   \n",
      "...                         ...                        ...   \n",
      "3904                IT Services     Information Technology   \n",
      "3905                IT Services     Information Technology   \n",
      "3906     Staffing & Outsourcing          Business Services   \n",
      "3907                 Accounting         Accounting & Legal   \n",
      "3908  Biotech & Pharmaceuticals  Biotech & Pharmaceuticals   \n",
      "\n",
      "                         Revenue               Competitors Easy Apply  \n",
      "0       Unknown / Non-Applicable                        -1         -1  \n",
      "1       Unknown / Non-Applicable                        -1         -1  \n",
      "2       Unknown / Non-Applicable                        -1       True  \n",
      "3       Unknown / Non-Applicable      Zocdoc, Healthgrades         -1  \n",
      "4       Unknown / Non-Applicable  BBDO, Grey Group, Droga5         -1  \n",
      "...                          ...                       ...        ...  \n",
      "3904    $10 to $25 million (USD)                        -1         -1  \n",
      "3905   $50 to $100 million (USD)                        -1         -1  \n",
      "3906  $100 to $500 million (USD)                        -1         -1  \n",
      "3907  Less than $1 million (USD)                        -1         -1  \n",
      "3908      $2 to $5 billion (USD)                    AbbVie         -1  \n",
      "\n",
      "[3909 rows x 17 columns]\n"
     ]
    }
   ],
   "source": [
    "data_file = \"../data/DataScientist.csv\"\n",
    "df = pd.read_csv(data_file, encoding='utf-8')\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d4e879b3-216f-4d9a-88ff-3ebec6c12216",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_list_of_items(df, column_name):\n",
    "    values = df[column_name].values\n",
    "    values = [item for sublist in values for item in sublist]\n",
    "    list_of_items = list(set(values))\n",
    "    return list_of_items"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "8e1ae5ab-efd0-4ea3-aa4b-0905224bd625",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_emails(df):\n",
    "    email_regex = '[^\\s:|()\\']+@[a-zA-Z0-9\\.]+\\.[a-zA-Z]+'\n",
    "    df['emails'] = df['Job Description'].apply(lambda x: re.findall(email_regex, x))\n",
    "    emails = get_list_of_items(df, 'emails')\n",
    "    return emails"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "9ef1d94a-974d-46d1-b569-a8a7ff38f514",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['hrhelpdesk@phila.gov', 'talent@quartethealth.com', 'bflores1@lacourt.org', 'sclagett@apexsystems.com', 'mturner@simplybiotech.com', 'careers@edo.com', 'Talent.manager@techquarry.com', 'resumes@nextgentechinc.com', 'ekingery@apexsystems.com', 'talent@ebay.com', 'bwaldron@apexsystems.com', 'info@springml.com', 'chandra@ebasetek.com', 'JEND557@kellyscientific.com', 'AMunoz4@dhs.lacounty.gov', 'naga@clouddatasystemsus.com', 'careers@crowley.com', 'Ârakesh@athreyainc.com', 'brenna.boies@cybercoders.com', 'Âvikram@pddninc.net', 'accommodations@dollarshaveclub.com', 'ndarian@smithhanley.com', 'human.resources@thrivent.com', 'Melissa@kingmelissa.com', 'Vikasy@apninc.com', 'internalrecruiting@civisanalytics.com', 'dherbine@pipercompanies.com', 'bchitneedi@esharpedge.com', 'Careers.LatinAmerica@sap.com', 'Phila.OHR@phila.gov', 'accommodations@koniag.com', 'Niyaz.Ansari@artechinfo.com', 'accommodations-ext@fb.com', 'info@governmentjobs.com', 'lyssa.heine@arlut.utexas.edu', 'agiangiulio@fredbeans.com', 'jobs@primusglobal.com', 'erinn@cybercoders.com', 'employment@liprop.com', 'naveen.b@avanitechsolutions.com', 'cvwithdraw@deshaw.com', 'hari.haran@wisemen.com', 'mailbox_tas_recruit@humana.com', 'ignite@atos.netHere', 'info@njf.com', 'privacy@honest.com', 'compliance@integralads.com.To', 'LMurray@TechUSA.net', 'careers@springhealth.com', 'careers@mpulsemobile.com', 'Marcus.Quigley@cybercoders.com', 'sjohnson@arcgonline.com', 'favalos@deaconrecruiting.com', 'Âbhavna@tekleaders.com', 'hr@btsresearch.com', 'mreilly@itechsolutions.com', 'atÂshravya@techprojects.us', 'accommodations@dollarshaveclub.comPlease', 'Emily.Selzam@sig.com', 'Brooke.Schoen@kellyservices.com', 'accommodations@illumina.com', 'spam.leidos@leidos.com', 'DisabilityAccommodations@catalent.com', 'algo__autobench_research_scientist_a84151f42us@ivy.greenhouse.io', 'Shibu.S@avacend.com', 'careers@Healthfirst.org', 'rama.diverse@gmail.com', 'brenna.boies@gmail.com', 'Candidate.Accommodations@Disney.com', 'Application_Accommodation@colpal.com', 'naveen@dsiginc.com', 'Rohan.g@avacend.com', 'uschr@usc.edu', 'Sandra@OSIEngineering.com', 'recruiting@kariusdx.com', 'Motao.Zhu@NationwideChildrens.org', 'disability-accommodations@treehousefoods.com', 'shane@arksolutionsinc.com', 'compliance@integralads.com', 'jjovovic@wintrustwealth.com', 'mani@net2source.com', 'careers@3redpartners.com', 'krishna@itminds.net', 'Prajwal.Rajappa@nationwidechildrens.org', 'dani@pavetalent.com', 'backgroundcheck-inquiries@deshaw.com', 'tansib@infotreeglobal.com', 'dneto@apexsystems.com', 'faqpchr@phila.gov', 'nick.sottile@bombas.com', 'dgingerich@carisls.com', 'talent.acquisition@servicenow.com', 'CareersUS@QVC.com', 'sai@kanisol.com', 'Careers.NorthAmerica@sap.com', 'Lilly_Recruiting_Compliance@lists.lilly.com', 'abbas@infotreeglobal.com', 'usaccommodations@globalfoundries.com', 'hesquivel@deaconrecruiting.com', 'cskumar@xcelogroup.com', 'hr@texastoolbelt.com.com', 'accommodations@hioscar.com', 'accommodations-ext@snap.com', 'Josh.carlos@itbtalent.com', 'nico.molenschot@dllgroup.com', 'career@infoquestgroup.com', 'accommodations@teacherspayteachers.com', 'hr@exponent.com', 'HR-Accommodations@FireEye.com', 'sukh@arksolutionsinc.com', 'security@quartethealth.com', 'recruiting-inquiries@deshaw.com', 'vtopcagic@relevante.biz', 'RecruitingAccommodation@guidehouse.com', 'sdonovan@judge.com', 'naveen.remkumar@two95intl.com', 'jsmith@quartethealth.com', 'careers@gilead.com', 'privacy@grubhub.com', 'ukdiversity.recruitment@gsk.com', 'liza.k@wisemen.com', 'shobana@infowaygroup.com', 'kushal@nextgentechnologiesinc.com', 'employeeservices@apexsystemsinc.com', 'divyansh@net2source.com', 'sachin@apninc.com', 'rohit.mcdonald@prolim.com', 'recruitingagencies@integralads.com', 'careers@tredence.com', 'TalentAcquisition@email.chop.edu', 'accommodations@ehealthinsurance.com', 'mike.pachella@sig.com', 'TalentAcquisitionTeam@dnb.com', 'HR@dtexsystems.com', 'reasonable.accommodation@mastercard.com', 'rama@diverselynx.com', 'jason.weinstein@mondo.com', 'elizabeth@biophaseinc.com', 'kushal@apninc.com', 'ram.kishor@diverselynx.com', 'mfetterolf@apexsystems.com', 'Aleo431@KellyScientific.com', 'statjobs@austin.utexas.edu', 'vishal@arksolutionsinc.com', 'bri.haggerty@cybercoders.com', 'botten@itechsolutions.com', 'krobinson@carisls.com', 'jobs@temboo.com', 'lhatteme@usc.edu', 'Alok.Kumar@artech.com', 'ADACoordinator@bmd.hctx.net', 'workday_recruiting@iqvia.com', 'rakesh@athreyainc.com', 'dkuphal@myvoca.com', 'pranali@fisecglobal.net', 'cvshealthsupport@us.ibm.com', 'rajeshk@avtechsol.com', 'TONM369@kellyservices.com', 'debasisb@askstaffing.com', 'bridget@alphaconsulting.com', 'accommodations@relx.com.Elsevier', 'Sam.Bodian@CyberCoders.com', 'Ranjith.vemula@idctechnologies.com', 'Âanvesh@conchtech.com', 'ieo.mailbox@aero.org', 'karthik@infovision.com', 'kevin@ingenium.agency', 'shaylyn.jaworowski@mondo.com', 'hr@cloudflare.com', 'careers@greenkeytech.com', 'rkilgore@relevante.com', 'cvs_support@modernhire.com', 'careers@atai.life', 'Mallikharjun@conchtech.com', 'tom@emeraldresourcegroup.com', 'Careers.APJ@sap.com', 'andrew.cooper@modis.com', 'khartley@apexsystems.com', 'TalentAcquisition@grubhub.com', 'icfcareercenter@icf.com', 'Amit@apninc.com', 'Careers@sap.com', 'irvine@camdenkelly.com', 'vandanat@sysmind.com', 'suda@sutoer.com', 'gjimenez@uic.edu', 'hala.hillo@motektech.com', 'jfowler@deaconrecruiting.com', 'cgarza@mleehealthcare.com', 'jim@ingenium.agency', 'talentacquisition@dexcom.com', 'tim@blackfynn.com', 'chandikumar@infowaygroup.com', 'paypalglobaltalentacquisition@paypal.com', 'MyRecruiter@alere.com', 'ldmayberry@apexsystems.com', 'Accommodation.Reques@am.jll.com', 'bhavna@tekleaders.com', 'shravya@techprojects.us', 'ajones@simplybiotech.com', 'pawans@ustechsolutionsinc.com', 'vinay@logicplanet.com', 'jayaraman@infowaygroup.com', 'pathfinder@pnc.com', 'US_Employment_Compliance@cgi.com', 'staffingaadar@msd.com', 'Kerry.orton@nationwidechildrens.org', 'e.riley@nigelfrank.com', 'usrecruitment@sdl.com', 'dallas@camdenkelly.com', 'recruitinghelp@mitre.org', 'recruitment@numericjobs.com', 'tharrell@chiptonross.com', 'accommodations@relx.com', 'accommodations@climate.com', 'info@prolim.com', 'shai879@kellyservices.com', 'recruiter@avianaglobal.com', 'neeraj.h@avacend.com', 'applyassistance@danaher.com']\n"
     ]
    }
   ],
   "source": [
    "emails = get_emails(df)\n",
    "print(emails)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "a53613b7-558d-4e3f-9adb-8918bd78df2e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_urls(df):\n",
    "    url_regex = '(http[s]?://(www\\.)?[A-Za-z0-9–_\\.\\-]+\\.[A-Za-z]+/?[A-Za-z0-9$\\–_\\-\\/\\.]*)[\\.)\\\"]*'\n",
    "    df['urls'] = df['Job Description'].apply(lambda x: [x[item.span()[0]:item.span()[1]] for item in re.finditer(url_regex, x)])\n",
    "    urls = get_list_of_items(df, 'urls')\n",
    "    return urls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "e9f3f186-f0d5-4d32-9418-0f8c1ac56c9e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['https://youtu.be/c5TgbpE9UBI', 'https://www.linkedin.com/in/emma-riley-72028917a/', 'https://www.dol.gov/ofccp/regs/compliance/posters/ofccpost.htm', 'https://www.naspovaluepoint.org/portfolio/mmis-provider-services-module-2018-2028/hhs-technology-group/).', 'https://www.instagram.com/gatestonebpo', 'http://jobs.sdsu.edu', 'http://www.colgatepalmolive.com.', 'http://www1.eeoc.gov/employers/upload/eeoc_self_print_poster.pdf', 'https://www.gofundme.com/2019https', 'https://www.decode-m.com/', 'https://bit.ly/2lCOcYS', 'https://www.cslbehring.com/careers/eeo-statement', 'https://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/.', 'http://www.fujitsu.com/global/about/corporate/info/', 'https://twitter.com/FastSwitchGL', 'https://hrrm.harriscountytx.gov/Pages/Medical.aspx', 'http://www.dol.gov/ofccp/regs/compliance/posters/pdf/ofccp_eeo_supplement_final_jrf_qa_508c.pdf', 'https://teladochealth.com/en/about/', 'http://www.tetratech.com/en/benefits', 'https://bmoharriscareers.com.', 'http://eleverpro.com', 'https://www.linkedin.com/company/mindteck', 'https://www.insight.com/en_US/about/newsroom/press-releases/2019/07012019-insight-recognized-at-no14-on-crns-2019-solution-provider-500-list.html', 'https://ivs.swri.org.', 'http://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/0800/all-professional-engineering-positions-0800/', 'https://www.sparklight.com/about/careers', 'https://cgi-veterans.jobs/', 'http://adminguide.stanford.edu', 'https://usa.healthcare.siemens.com/about.', 'https://drw.com/privacy-notice.California', 'https://hirepeopletree.com/ccpa', 'https://clientapps.jobadder.com/45103/fast-switch', 'http://www-01.ibm.com/employment/us/benefitsIBM', 'http://ope.ed.gov/accreditation', 'http://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/1300/general-physical-science-series-1301/', 'http://www.hillspet.com.', 'https://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/0400/general-natural-resources-management-and-biological-sciences-series-0401/', 'https://www.opm.gov/policy-data-oversight/hiring-information/direct-hire-authority/You', 'https://www.mainlinehealth.org/careers', 'https://oim.unjspf.org/.', 'https://www.indeed.com/legal/applicant-privacy', 'https://air.jd.com/', 'http://cherokee-os.com/Pages/Home.aspx', 'https://www.kalderos.com/company/about', 'https://bloomberg.com/company/d4gx/).', 'https://www.linkedin.com/in/divyansh-srivastava-11563041/', 'http://business.pinto.co', 'https://www.facebook.com/irisplans', 'https://bit.ly/2YT6oii', 'http://www.twitter.com/forgerock.', 'http://www.cslplasma.com/careers', 'http://www.cbica.upenn.edu/', 'http://www.leoniegroup.com/careers', 'https://www.linkedin.com/in/rama-shanker-verma-aman-01a283a/', 'https://www.dol.gov/ofccp/pdf/pay-transp_', 'http://www.absorption.com', 'https://lnkd.in/eqttaa4', 'http://www.kairostech.com', 'http://www.nationwidechildrens.org/patient-centered-pediatric-research-program.', 'https://upt.ac/16ee15fc.What', 'https://www.nextstepsystems.com/jobseekers_submit_raul.html.', 'http://www.recruiter.technology', 'http://aice-eval.org/members/.', 'https://capps.taleo.net/careersection/ex/jobsearch.ftl', 'https://relevante.jobs.net/en-US/join', 'https://drw.com/privacy-notice.', 'http://www.tomsofmaine.com.', 'https://wd5.myworkday.com/usc/d/inst/1$9925/9925$46086.htmld', 'http://www.opm.gov/qualifications/standards/indexes/num-ndx.asp', 'http://www.tetratech.com/en/benefits.', 'https://www.youtube.com/user/MindteckVideos', 'https://iheartmedia.com/', 'https://e-verify.uscis.gov/esp/media/resourcesContents/EverifyPosterSpanish.pdf', 'http://ophthalmology.stanford.edu/', 'https://dellmed.utexas.edu/culture.', 'https://prolancer.com/jobs/send-proposal/301', 'http://www.med.upenn.edu/This', 'https://studentaid.ed.gov/sa/repay-loans/forgiveness-cancellation/public-service', 'http://www.med.upenn.edu/.', 'https://bfa.sdsu.edu/hr/jobs/benefits', 'https://www.monocerosbio.com', 'http://www.twdb.texas.gov/jobs/index.asp', 'https://cvshealth.com/covid-19', 'https://medium.com/gofundme-storieshttps', 'https://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/1500/computer-science-series-1550/', 'http://citylinedfw.com/about/', 'https://bit.ly/2UKaqWQ', 'https://hrrm.harriscountytx.gov/Pages/EqualEmploymentOpportunityPlan.aspx', 'https://www.linkedin.com/company/integress-inc./', 'https://corporate.jd.com/', 'https://lnkd.in/ePyq7pM', 'https://www.kalderos.com/company/culture', 'https://twitter.com/HPE_Careers', 'https://www.facebook.com/camdenkellycorp)', 'http://www.hr.sao.state.tx.us/Compensation/JobDescriptions.aspx.HHS', 'https://www.instagram.com/fastswitchgl/', 'http://www.opm.gov/qualifications/standards/index-Standards.asp', 'http://www-01.ibm.com/employment/us/benefits/', 'https://www.nextstepsystems.com', 'http://www.ed.gov/about/offices/list/ous/international/usnei/us/edlite-visitus-forrecog.html.', 'http://www.itrecruiters.technology.', 'https://www.accountingprincipals.com/privacy-policy/', 'http://www.hr.sao.state.tx.us/Compensation/JobDescriptions.aspx', 'http://www2.deloitte.com/us/en/pages/about-deloitte/articles/deloitte-corporate-citizenship.html', 'http://www.fujitsu.com/global/solutions/business-technology/ai/', 'https://www.swri.org/technical-divisions/defense-intelligence-solutions.', 'https://jobs.gartner.com/applicant-privacy-policy', 'http://adminguide.stanford.edu.', 'https://www.ecdi.org', 'http://jconnectinc.com/', 'https://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/0600/medical-technologist-series-0644/', 'https://www.amringusa.com/company-2/)', 'http://www.angeiongroup.com.', 'https://www.pddninc.com/', 'https://www.uptake.com/careers.What', 'http://ccmb.usc.edu', 'https://www.linkedin.com/in/prasadmamidela', 'https://bit.ly/2m14P0P', 'https://drw.com/california-privacy-notice.', 'http://www.mylife.com.', 'https://e-verify.uscis.gov/esp/media/resourcesContents/EverifyPosterEnglish.pdf', 'http://www.dol.gov/ofccp/regs/compliance/posters/pdf/eeopost.pdf.', 'https://www.linkedin.com/in/niyazansari/', 'https://www.soarenmanagement.com/', 'https://benefits.northropgrumman.com/us/en2/BenefitsOverview/Pages/default.aspx', 'http://www.galvanize.com/courses/data-science/', 'http://www-group.slac.stanford.edu/esh/eshmanual/pdfs/ESHch01.pdf', 'https://investor.insight.com/press-release/houston-schools-deploying-iot-enabled-building-safety-platform-improve-emergency', 'https://www.facebook.com/HPECareers', 'https://h-gac.com/careers', 'http://www2.deloitte.com/us/en/pages/careers/topics/recruiting-tips.htmlCategory', 'https://www.nvidia.com/en-us/contact/', 'http://cherokeenationbusinesses.com/federalSolutions/Pages/overview.aspx', 'https://wd5.myworkday.com/usc/d/inst/1$9925/9925$45297.htmld', 'http://cherokee-cna.com/Pages/Home.aspx', 'http://med.ucsd.edu', 'http://www.colgatebsbf.com.', 'https://join.collectivehealth.com/box', 'https://tinyurl.com/ycqdhb98', 'https://www.linkedin.com/company/fast-switch-great-lakes', 'https://match-relevant.com/your-story', 'https://imanage.com/privacy-policy/', 'http://www.med.upenn.edu/', 'https://portal.slac.stanford.edu/sites/lcls_public', 'https://www.facebook.com/SpruceCrew', 'https://investor.insight.com/press-release/insight-enterprises-acquire-pcm-inc', 'https://www.ibm.com/analytics/', 'https://www.cedars-sinai.edu/Research/Research-Labs/Guarnerio-Lab/', 'https://www.cedars-sinai.org/research/labs/svendsen.html.', 'https://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/1300/general-physical-science-series-1301/', 'https://youtu.be/QzKe9iZszD0', 'https://www.gpo.gov/fdsys/pkg/CFR-2011-title20-vol3/pdf/CFR-2011-title20-vol3-sec656-3.pdf', 'http://securityreport.uchicago.edu.', 'https://www.nextstepsystems.com/employers_submit_raul.html.', 'https://www.ibm.com/watson/', 'http://www.sorrentotherapeutics.com', 'https://www.facetec.com/', 'https://wd5.myworkday.com/iheartmedia/d/task/3005$1999.htmld', 'http://www.camadvgrp.com/', 'https://www.amazon.jobs/en/disability/us', 'https://climate.com/careers', 'https://encoura.org/privacy-policy/', 'http://instagram.com/lifeatservicetitan', 'https://wd5.myworkday.com/iheartmedia/d/task/3005$4482.htmld', 'https://apol-recruit.ucsd.edu/apply/JPF02462', 'https://www.facebook.com/TexasCounties', 'http://www.ibm.com/ibm/responsibility/corporateservicecorps', 'http://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/1500/computer-science-series-1550/', 'https://insight.hqprod.businesswire.com/press-release/insight-hosts-global-ai-competition-healthcare-innovation-cincinnati', 'http://www.capgemini.com/resources/equal-employment-opportunity-is-the-law', 'https://twitter.com/camdenkellycorp)', 'https://bmoharriscareers.com.BMO', 'https://www.consumer.ftc.gov/JobScams', 'http://www.nasdaq.com', 'http://www.phila.gov/personnel/ExamsFAQ.html', 'https://lnkd.in/e9ZpD5J', 'https://www.facebook.com/Access-Control-Group-Access-100773445034653/', 'https://www.bluenalu.com/', 'http://cherokeenationbusinesses.com/Pages/home.aspx', 'http://www.oakwoodsearch.com', 'https://www.facebook.com/viahart', 'http://www.twdb.texas.gov/jobs/benefits.asp', 'https://www.pinterest.com/mindteck/', 'http://www.ucop.edu/academic-personnel-programs/_files/apm/apm-310.pdf', 'http://www.eleverpro.com/executive-recruiter-jobs/', 'https://youtu.be/CC-seRMEo8s', 'https://www.praetorian.com/careers', 'https://www.facebook.com/ECDIoh/', 'https://whisk.com/cognitive-food-platform/)', 'https://santannaenergyservices.com/', 'http://cherokee-cnts.com/Pages/home.aspx', 'http://www.linkedin.com/company/3279780)', 'http://iheartmediacareers.com/Pages/EEO.aspx', 'https://www.ibm.com/internet-of-things/', 'https://tinyurl.com/v4p9yy7', 'http://neon.life)', 'https://www.home.neustar.', 'https://it.sdsu.edu/', 'https://e-verify.uscis.gov/esp/media/resourcesContents/WebBPPOSTERRtoWEnglishversion.pdf', 'https://www.goldmansachs.com/careers/footer/disability-statement.html', 'https://plus.google.com/u/0/106722962279155885960)', 'http://www1.eeoc.gov/employers/poster.cfm', 'https://h-gac.com/careersH-GAC', 'http://www.xome.com', 'https://jobs.btginc.com.', 'https://www.youtube.com/watch', 'https://mastercard.jobs/toronto-can/senior-data-engineer/2B400D4C9F4345E2B2039B9AB1845623/job/', 'http://www2.ed.gov/admins/finaid/accred/index.html', 'http://www.arete.comAret', 'https://www.verizonmedia.com/careers/contact-us.html)', 'https://bit.ly/2m1510', 'https://bfa.sdsu.edu/hr/jobs/job-opportunities.aspx', 'https://www.usajobs.gov/Help/working-in-government/unique-hiring-paths/students/', 'https://calendly.com/jake-villarreal/calendar', 'https://www.builtinla.com/company/servicetitan-inc', 'http://bit.ly/amazon-scot', 'https://muse.cm/2t8eGlNAttention', 'https://www.facebook.com/deeplensai/', 'http://integralads.com/', 'http://adminrecords.ucsd.edu/PPM/docs/230-311.html', 'https://jobs.lever.co/kraken.', 'http://www.fujitsu.com/global/digitalannealer/', 'https://hpi.swri.org.', 'http://ope.ed.gov/accreditation/.', 'https://www.medlypharmacy.com/', 'http://whed.net/home.php.', 'https://www.amazon.jobs/en/disability/us.', 'https://www.facebook.com/Mindteckglobal/', 'https://cherokee-federal.com/', 'https://www.county.org/', 'http://www.mdanderson.org/about-us/legal-and-policy/legal-statements/eeo-affirmative-action.html', 'https://www.expesicor.com/jobs/research-scientist-san-diego/', 'https://www.ftccomplaintassistant.gov/.', 'https://a127-jobs.nyc.gov/.', 'https://www.dascena.com/', 'http://www.tetratech.com/en/benefits.For', 'http://www.walkerelliott.com/candidates/jobs/jobDetail/default.aspx', 'http://cherokeenationbusinesses.com/federalSolutions', 'https://mobilemathlab.com/', 'https://getspruce.com', 'https://hypori.com/', 'http://www.globalfoundries.com.', 'https://www.nvidia.com/en-us/geforce/products/geforce-now/', 'https://smokefree.sdsu.edu/', 'http://www.kingmelissa.com/', 'https://apol-recruit.ucsd.edu/apply/JPF02126', 'https://safety.temple.edu/reports-logs/annual-security-report', 'https://apol-recruit.ucsd.edu/apply/JPF02186', 'http://camdenkelly.com/jobs', 'https://twitter.com/mindteck', 'http://one2one.sparklight.com/tag/coronavirus/', 'https://grants.nih.gov/grants/guide/notice-files/NOT-OD-18-210.html).', 'http://bit.ly/1mzJQeL', 'http://www.cslbehring.com', 'http://www.sandia.gov', 'https://dnb.wd1.myworkdayjobs.com/Careers', 'https://www.dol.gov/ofccp/regs/compliance/posters/pdf/eeopost.pdf', 'https://usa.healthcare.siemens.com/careers.', 'https://www.trinityra.org/joblist.htm', 'http://www.hr.sao.state.tx.us/Compensation/JobDescriptions.aspx.', 'http://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/', 'https://careers.nordstrom.com/', 'https://e-verify.uscis.gov/esp/media/resourcesContents/WebBPPOSTERRtoWSpanishversion.pdf', 'http://jobs.apple.com/)', 'https://www.opm.gov/policy-data-oversight/pay-leave/salaries-wages/IF', 'https://www.cdc.gov/hai/epicenters/chicago.html.', 'https://apol-recruit.ucsd.edu/apply/JPF02177', 'https://stockedrobotics.freshteam.com/jobs', 'http://indeedhi.re/IndeedBenefits', 'http://www.compqsoft.com/current-openings.html', 'https://eng.joinroot.com/', 'https://www-03.ibm.com/press/us/en/pressrelease/50744.wss', 'https://qbrc.swmed.edu/', 'https://prolancer.com/jobs/send-proposal/298', 'https://www.themuse.com/profiles/servicetitan', 'http://www.parallelpartners.com.', 'https://www.ebayinc.com/our-company/diversity-inclusion/.', 'http://www.avanitechsolutions.com', 'https://muse.cm/2t8eGlN', 'http://www-01.ibm.com/employment/us/benefits', 'https://www.techatbloomberg.com/nlp/', 'http://cherokeenationbusinesses.com/careers/Pages/home.aspx', 'https://www.ers.texas.gov/', 'https://www.arena.io/about/careers.html', 'https://mastercard.jobs/toronto-can/lead-data-engineer/567DD39603B141C5AAF86389682B751D/job/', 'http://www-hr.ucsd.edu/saa/nondiscr.htmlUC', 'https://www.tandemdiabetes.com/careers/life-at-tandem', 'https://argogroup.wd1.myworkdayjobs.com/Argo', 'https://investor.insight.com/press-release/insight-recognized-magic-quadrant-managed-workplace-services-north-america', 'http://www.corelogic.com/privacy.aspxConnect', 'https://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/', 'http://www.dol.gov/ofccp/regs/compliance/posters/pdf/eeopost.pdf', 'https://www.mindteck.com/career/life-at-mindteck.html', 'https://www.deeplens.ai/', 'http://www.tapestry.com/', 'https://www.opm.gov/policy-data-oversight/classification-qualifications/general-schedule-qualification-standards/0600/medical-technologist-series-0644/In', 'https://www.vdriveitsolutions.com/', 'https://blog.activision.com', 'https://vimeo.com/311952506', 'https://searchitchannel.techtarget.com/feature/Data-center-infrastructure-spending-gets-AI-boost', 'http://www.ibm.com/ibm/responsibility/initiatives.html']\n"
     ]
    }
   ],
   "source": [
    "urls = get_urls(df)\n",
    "print(urls)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f19ea69-66c5-497d-a0e3-754c86b50df3",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
